library(dplyr)
library(readr)
library(ggplot2)
library(openxlsx)
library(knitr)
library(tibble)
library(stringr)
library(stringi)
library(readxl)
library(lubridate)
library(shiny)
library(plotly)
# Unzip and extract ODK data from ODK zip
df <- as.data.frame(extract_data_from_odk_zip(params$file_path_zip, params$file_name_csv))
# Formatting dates from integer (in ms) to time stamp
df$start <- format_date_ms(df$start)
df$end <- format_date_ms(df$end)
# filtering for events that occurred after 18th July 21
df <- subset(df, as.Date(start) > as.Date("18.07.2021", "%d.%m.%Y"))
# subtracting end from start date
df$time_spent = round(as.numeric(df$end - df$start))
# splitting the node strings so that only the question name remains
df$question = sapply(df$node, create_question)
df <- decode_question(df, df$question, params$codebook)
df <- decode_categories(df, params$codebook)
df <- df %>%
# bringing the data in the right order
arrange(`instance ID`, node, start) %>%
# adding two empty columns to store the new features in
add_column(time_till_change=NA) %>%
add_column(changed_from=NA)
# iterating over the df and computing the time it took until an answer was changed + adding what the question was before
for (i in 1:nrow(df)){
if (df$`old-value`[i]==df$`new-value`[i-1] && !is.na(df$`old-value`[i]) && !is.na(df$`new-value`[i-1]) ){
df$time_till_change[i] <- round(as.numeric(df$start[i]-df$end[i-1]))
} else{
next
}
}
head(df)
| instance ID | event | node | start | end | latitude | longitude | accuracy | old-value | new-value | time_spent | question | question_decoded | new_value_decoded | old_value_decoded | time_till_change | changed_from |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | group questions | /data/a1 | 2021-08-30 13:02:55 | 2021-08-30 13:03:25 | NA | NA | NA | NA | NA | 29 | a1 | NA | NA | NA | NA | NA |
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | question | /data/a1/a1_a_4a | 2021-08-30 13:02:55 | 2021-08-30 13:03:25 | NA | NA | NA | NA | T-F0014-P0223 | 29 | a1_a_4a | If QR code scanning is not possible, please manually enter the participant identification code | T-F0014-P0223 | NA | NA | NA |
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | group questions | /data/b1 | 2021-08-30 13:02:54 | 2021-08-30 13:02:55 | NA | NA | NA | NA | NA | 1 | b1 | NA | NA | NA | NA | NA |
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | group questions | /data/b2 | 2021-08-30 13:07:05 | 2021-08-30 13:08:32 | NA | NA | NA | NA | NA | 86 | b2 | NA | NA | NA | NA | NA |
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | group questions | /data/b2 | 2021-08-30 13:08:34 | 2021-08-30 13:09:24 | NA | NA | NA | NA | NA | 50 | b2 | NA | NA | NA | NA | NA |
| uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 | question | /data/b2/b1_7 | 2021-08-30 13:07:05 | 2021-08-30 13:08:32 | NA | NA | NA | NA | 1 | 86 | b1_7 | Is this facility the closest health facility to your home? | Yes | NA | NA | NA |
summary(df)
## instance ID event node start
## Length:14117 Length:14117 Length:14117 Min. :2021-07-20 11:55:08
## Class :character Class :character Class :character 1st Qu.:2021-07-28 13:38:51
## Mode :character Mode :character Mode :character Median :2021-08-20 11:40:39
## Mean :2021-08-14 02:07:37
## 3rd Qu.:2021-08-26 09:46:43
## Max. :2021-08-31 18:51:19
##
## end latitude longitude accuracy
## Min. :2021-07-20 11:55:09 Mode:logical Mode:logical Mode:logical
## 1st Qu.:2021-07-28 13:40:22 NA's:14117 NA's:14117 NA's:14117
## Median :2021-08-20 11:26:44
## Mean :2021-08-14 00:59:12
## 3rd Qu.:2021-08-26 09:44:05
## Max. :2021-08-31 18:51:18
## NA's :1986
## old-value new-value time_spent question
## Length:14117 Length:14117 Min. : 0.0 Length:14117
## Class :character Class :character 1st Qu.: 6.0 Class :character
## Mode :character Mode :character Median : 17.0 Mode :character
## Mean : 33.2
## 3rd Qu.: 34.0
## Max. :9537.0
## NA's :1986
## question_decoded new_value_decoded old_value_decoded time_till_change
## Length:14117 Length:14117 Length:14117 Min. : 1.00
## Class :character Class :character Class :character 1st Qu.: 2.00
## Mode :character Mode :character Mode :character Median : 5.00
## Mean :12.10
## 3rd Qu.:14.25
## Max. :54.00
## NA's :14037
## changed_from
## Mode:logical
## NA's:14117
##
##
##
##
##
no_inst = length(unique(df$`instance ID`))
no_event = nrow(df)
earliest_start = as.Date(min(df$start))
latest_end = as.Date(max(df$end[!is.na(df$end)]))
Total number of instances: 307
Total number of events/questions: 14117
Examination period: 2021-07-20 - 2021-08-31
df_by_day <- df %>%
mutate(start_date = as.Date(start)) %>%
count(start_date, name = "count")
gg1 <- ggplot(df_by_day, aes(x = start_date, y = count)) +
geom_line() +
geom_smooth(alpha=0.5, colour="red", method="loess", se=F) +
labs(title = "Number of Events/Questions Started by Day with Smoothed Regression Line", y = "Number of Questions/Events Started", x = "Satrt Date") +
theme_light()
gg1
df_wday_hour <- df %>%
mutate(wday=wday(start, label=T, week_start = 1), hour=hour(start)) %>%
count(wday, hour, name="count_wday_hour") %>%
arrange(desc(wday))
theme_heatmap <- theme_light() +
theme(panel.grid = element_blank(),
panel.border = element_blank(),
plot.title = element_text(face = "bold", size = 11, hjust = 0.5),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_text(size=10),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 10),
legend.position = "none")
gg2 <- ggplot(df_wday_hour, aes(x = wday, y = hour, fill = count_wday_hour)) +
geom_tile(colour="white") +
scale_fill_gradient(low = "#fff0f0", high="#940606") +
scale_y_reverse(breaks=c(23:0), labels=c(23:0), expand = c(0,0)) +
scale_x_discrete(expand = c(0,0), position = "top") +
labs(title = "Number of Started Events/Questions by Day of Week / Hour of Day", y = "Hour of Day") +
geom_text(aes(label = count_wday_hour), size = 2) +
theme_heatmap
gg2
df_clean = subset(df, time_spent<quantile(df$time_spent,0.95, na.rm=TRUE))
hist(df_clean$time_spent[!is.na(df_clean$time_spent)]/60, breaks=20, xlab = "Time Spent in Minutes", main = "Histogram of the Time Spent by Question")
df_median_time_per_question <- df %>%
filter(event=="question") %>%
group_by(question_decoded) %>%
summarise(median_time_spent = median(time_spent)) %>%
arrange(desc(median_time_spent)) %>%
mutate(median_time_spent = round(seconds_to_period(median_time_spent)))
df_median_time_per_question
| question_decoded | median_time_spent |
|---|---|
| Were you given a paper or record to take with you for completing the referral? | 1M 34S |
| Were you told why to go? | 1M 34S |
| What do you intend to do now? | 1M 34S |
| When do you need to complete the referral? | 1M 34S |
| Can you specify these signs and symptoms? | 53S |
| Were you told where to go? | 41S |
| If QR code scanning is not possible, please manually enter the participant identification code | 30S |
| Did the provider speak in a language you understand? | 28S |
| Did you feel the provider treated you and the child with respect? | 28S |
| Did you find the provider showed concern and empathy? | 28S |
| Did you find the provider was kind to you? | 28S |
| How do you feel overall with the service you received at the facility today? | 28S |
| Was the service delayed or were you kept waiting for a long time? | 28S |
| Would you recommend this facility to a friend / family with a sick child? | 28S |
| Did you pay for something at the facility today? | 26S |
| Did you miss work to bring the child to the facility today? | 26S |
| Do you intend to buy some medicines outside of the facility? | 26S |
| Is this facility the closest health facility to your home? | 26S |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | 26S |
| What do you intend to do if the sick child does not get completely better or become worse? | 26S |
| Were you given general information or advice about feeding or breastfeeding? | 25S |
| Can you explain to me why this device was used? | 24S |
| Please scan the participant’s QR code | 18S |
| Can you show me all the medicines and prescriptions that you received? | 17S |
| Did the provider explain to you how to give these medicines to the child at home? | 17S |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 17S |
| How did you feel with the fact that the provider used of a tablet for the consultation of the child? | 16S |
| Did the provider explain to you the result that was given by the device? | 14S |
| Did the provider give or prescribe any medicines for the child to take home? | 13S |
| Did the provider refer the child? | 13S |
| Did the provider tell you what illness your child has? | 13S |
| Please select the current district | 12S |
| Did the provider use the device that is represented in the following picture during the consultation of the child? | 9S |
| fcode | 9S |
| Did the provider use a tablet like this one for the consultation of the child? | 5S |
df_changes_per_question <- df %>%
filter(event=="question",
!is.na(time_till_change)) %>%
group_by(question_decoded) %>%
summarise(count_input_changes=n(),
median_time_till_change=median(time_till_change),
sd_time_till_change=sd(time_till_change)) %>%
arrange(desc(count_input_changes)) %>%
mutate(median_time_till_change = round(seconds_to_period(median_time_till_change)),
sd_time_till_change = round(seconds_to_period(sd_time_till_change), 1)) %>%
filter(count_input_changes > 1)
df_changes_per_question
| question_decoded | count_input_changes | median_time_till_change | sd_time_till_change |
|---|---|---|---|
| Did the provider explain to you how to give these medicines to the child at home? | 10 | 4S | 16.8S |
| Can you show me all the medicines and prescriptions that you received? | 9 | 8S | 17.1S |
| Was the service delayed or were you kept waiting for a long time? | 7 | 5S | 16S |
| Do you intend to buy some medicines outside of the facility? | 6 | 11S | 8.4S |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 6 | 4S | 19.1S |
| How do you feel overall with the service you received at the facility today? | 5 | 5S | 11.6S |
| If QR code scanning is not possible, please manually enter the participant identification code | 5 | 22S | 23.6S |
| Would you recommend this facility to a friend / family with a sick child? | 5 | 6S | 13.6S |
| Did you pay for something at the facility today? | 4 | 3S | 1.6S |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | 4 | 24S | 19.4S |
| Were you given general information or advice about feeding or breastfeeding? | 3 | 2S | 1.2S |
| Can you explain to me why this device was used? | 2 | 2S | 0S |
| Did the provider tell you what illness your child has? | 2 | 4S | 2.1S |
| Did the provider use the device that is represented in the following picture during the consultation of the child? | 2 | 6S | 2.1S |
| Did you feel the provider treated you and the child with respect? | 2 | 16S | 19.8S |
| Did you find the provider showed concern and empathy? | 2 | 2S | 1.4S |
df_stream <- df %>%
filter(!is.na(time_till_change)) %>%
count(question_decoded,
old_value_decoded,
new_value_decoded,
name="count_value_pairs",
sort=TRUE) %>%
filter(count_value_pairs > 1)
df_stream
| question_decoded | old_value_decoded | new_value_decoded | count_value_pairs |
|---|---|---|---|
| Can you show me all the medicines and prescriptions that you received? | All medicines received, no unfilled prescriptions | Some medicines and some unfilled prescriptions | 4 |
| Did the provider explain to you how to give these medicines to the child at home? | Yes, for all medicines | Yes, but only for some medicines | 3 |
| Would you recommend this facility to a friend / family with a sick child? | Strongly agree | Agree | 3 |
| Can you show me all the medicines and prescriptions that you received? | Some medicines and some unfilled prescriptions | Prescriptions only, no medicines | 2 |
| Did the provider explain to you how to give these medicines to the child at home? | Yes, but only for some medicines | Yes, for all medicines | 2 |
| Did the provider explain to you how to give these medicines to the child at home? | Yes, for all medicines | No | 2 |
| Did you feel the provider treated you and the child with respect? | Agree | Strongly agree | 2 |
| Did you pay for something at the facility today? | No | Yes | 2 |
| Do you intend to buy some medicines outside of the facility? | No | Yes, prescribed by the healthcare provider but not available at the facility | 2 |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | Very confident | Neutral | 2 |
| How do you feel overall with the service you received at the facility today? | Very satisfied | Somewhat satisfied | 2 |
| Was the service delayed or were you kept waiting for a long time? | Agree | Neither agree nor disagree | 2 |
| Was the service delayed or were you kept waiting for a long time? | Strongly agree | Disagree | 2 |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | Yes | No | 2 |
df_duration_per_inst <- df %>%
group_by(`instance ID`) %>%
summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
filter(duration_per_inst>quantile(duration_per_inst, 0.9, na.rm=TRUE)) %>%
mutate(duration_per_inst = round(seconds_to_period(duration_per_inst))) %>%
arrange(desc(duration_per_inst))
df_duration_per_inst
| instance ID | duration_per_inst |
|---|---|
| uuid:50e579ce-3dde-43f0-9aec-d524233cfcb0 | 13d 8H 41M 3S |
| uuid:92b7bd54-b06e-4c24-b7eb-44ef3de7d10f | 3d 2H 19M 12S |
| uuid:9b0ac8c9-6a22-441b-aad3-0df639715b21 | 20H 51M 60S |
| uuid:894e09b4-b086-4f3d-b5ad-9b37c3d7db5e | 10H 3M 3S |
| uuid:f0797482-3b4c-49f8-ac80-362fb9f4fb06 | 8H 45M 29S |
| uuid:90e19e6b-9fc5-4776-af28-921a55c7664d | 8H 36M 34S |
| uuid:096ab426-0473-442d-8441-d661ce7d2ec2 | 8H 32M 42S |
| uuid:5eb6622a-9327-4d22-b580-016d9913a435 | 8H 30M 42S |
| uuid:b9e5bde0-3ba1-4e54-921f-ead49247c45f | 8H 28M 30S |
| uuid:d4d74cf1-e3db-42f5-9a9d-fbc463ba9abf | 8H 0M 54S |
| uuid:0c971016-85d3-4892-998c-e7b3f0125309 | 7H 56M 37S |
| uuid:a60e6235-ec17-4730-8eff-37c764cd77d8 | 7H 50M 20S |
| uuid:1a1ac120-825c-4edf-a418-43674dd58c40 | 7H 37M 26S |
| uuid:1e70b4cc-4d97-4697-96b9-f89b5cc84bb4 | 7H 35M 31S |
| uuid:9da2333c-6ff3-4f6d-9f86-b8438195bc73 | 7H 33M 26S |
| uuid:bae4f3d0-c176-4f02-8f9c-c4cd88819f11 | 7H 28M 17S |
| uuid:d352bd5c-335c-44d3-9ae1-7c7871bcb28e | 7H 11M 54S |
| uuid:8c88164d-b0e7-4f35-8e59-56c237eb5330 | 6H 59M 27S |
| uuid:b86b9b2a-7920-47e4-8008-05e6f3b2fd72 | 6H 58M 55S |
| uuid:e4ef13de-9892-48e0-8b80-3a3fd7a157b9 | 6H 55M 59S |
| uuid:cfe21b8e-3b41-4907-b591-90b4c390e124 | 6H 1M 32S |
| uuid:25c32682-b91c-4e73-accf-fc3a09adae30 | 5H 45M 47S |
| uuid:a0a371b0-dcaf-4f8c-9dd5-919203561784 | 5H 39M 52S |
| uuid:272b75c7-c69e-4fa0-91c2-262cab9f50f0 | 5H 36M 29S |
| uuid:82fa132a-248a-4a56-8079-1a33d7aa6ed9 | 5H 33M 0S |
| uuid:538512f3-d12e-4502-9d64-8034df81fb62 | 5H 29M 8S |
| uuid:9d162283-ea8a-460b-b851-7df408406ede | 5H 26M 42S |
| uuid:fc3d50fe-4d9a-4708-8aeb-277e2c660866 | 5H 22M 23S |
| uuid:1c0f8e5d-b732-477f-8d43-c5f8753c61c5 | 5H 22M 16S |
| uuid:5842458a-51ee-47d4-92dd-c860d6bc871d | 5H 21M 59S |
| uuid:3881af79-ebe5-4ec7-ace6-fd2d7092fccc | 5H 14M 51S |
df_subsetted <- df %>%
group_by(`instance ID`) %>%
summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
filter(duration_per_inst<quantile(duration_per_inst, 0.9, na.rm=TRUE))
hist(as.numeric(df_subsetted$duration_per_inst/60), breaks=30, main="Duration per Instance in Minutes (outliers removed)", xlab="Duration in Minutes")
df_time_till_change_outliers <- df %>%
filter(time_till_change>quantile(df$time_till_change, 0.9, na.rm=TRUE)) %>%
arrange(desc(time_till_change)) %>%
mutate(time_till_change = round(seconds_to_period(time_till_change))) %>%
select(`instance ID`,
question_decoded,
old_value_decoded,
new_value_decoded,
time_till_change)
df_time_till_change_outliers
| instance ID | question_decoded | old_value_decoded | new_value_decoded | time_till_change |
|---|---|---|---|---|
| uuid:9abf7a26-9060-43de-a344-bad7ae1ecb1c | What do you intend to do if the sick child does not get completely better or become worse? | Return to this facility | Not sure | 54S |
| uuid:1c0f8e5d-b732-477f-8d43-c5f8753c61c5 | If QR code scanning is not possible, please manually enter the participant identification code | T-F0014-P0150 | T-F0014-P0222 | 50S |
| uuid:fc3d50fe-4d9a-4708-8aeb-277e2c660866 | If QR code scanning is not possible, please manually enter the participant identification code | T-F0014-P0146 | T-F0014-P0218 | 48S |
| uuid:388e775f-8ecc-4271-94e7-9c1d079af8a8 | Was the service delayed or were you kept waiting for a long time? | Agree | Neither agree nor disagree | 46S |
| uuid:46717996-f9dd-403b-9f85-2e519d1b0939 | Can you show me all the medicines and prescriptions that you received? | Prescriptions only, no medicines | Some medicines and some unfilled prescriptions | 46S |
| uuid:46717996-f9dd-403b-9f85-2e519d1b0939 | Did the provider explain to you how to give these medicines to the child at home? | No | Yes, but only for some medicines | 46S |
| uuid:46717996-f9dd-403b-9f85-2e519d1b0939 | How confident do you feel in how much of the medication to give each day and how many days to give it? | Neutral | Very confident | 46S |
| uuid:46717996-f9dd-403b-9f85-2e519d1b0939 | Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | Yes | No | 46S |
irregular_inst = c()
for (id in unique(df$`instance ID`)){
bin_vec = cut(df$start[df$`instance ID`==id],
breaks=10,
labels=F)
if (length(unique(bin_vec)) < 5) irregular_inst = c(irregular_inst, id)
}
paste0(length(irregular_inst), " out of ", length(unique(df$`instance ID`))," instances were found to have an inconsistent filling behaviour.")
## [1] "198 out of 307 instances were found to have an inconsistent filling behaviour."
last_bin_questions = c()
fig <- plot_ly(alpha=0.1)
for (id in irregular_inst){
temp_df = df[df$`instance ID`==id,]
temp_df$cut = cut(temp_df$start, breaks=10, labels=c("1. Part", "2. Part", "3. Part", "4. Part", "5. Part", "6. Part", "7. Part", "8. Part", "9. Part", "10. Part"))
fig <- fig %>% add_histogram(x=temp_df$cut, name=id)
last_bin_questions = c(last_bin_questions, temp_df$question_decoded[temp_df$cut=="10. Part"])
}
fig <- fig %>% layout(barmode = "overlay")
fig
kable(table(last_bin_questions) %>% as.data.frame() %>% arrange(desc(Freq)))
| last_bin_questions | Freq |
|---|---|
| Did you pay for something at the facility today? | 13 |
| Did you miss work to bring the child to the facility today? | 11 |
| Do you intend to buy some medicines outside of the facility? | 11 |
| Is this facility the closest health facility to your home? | 10 |
| Was the service delayed or were you kept waiting for a long time? | 9 |
| Would you recommend this facility to a friend / family with a sick child? | 8 |
| Can you show me all the medicines and prescriptions that you received? | 7 |
| Did the provider explain to you how to give these medicines to the child at home? | 7 |
| Did the provider speak in a language you understand? | 6 |
| Did you find the provider showed concern and empathy? | 6 |
| Did you find the provider was kind to you? | 6 |
| How confident do you feel in how much of the medication to give each day and how many days to give it? | 6 |
| Did you feel the provider treated you and the child with respect? | 5 |
| How do you feel overall with the service you received at the facility today? | 5 |
| Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? | 5 |
| What do you intend to do if the sick child does not get completely better or become worse? | 5 |
| Can you specify these signs and symptoms? | 4 |
| Were you given general information or advice about feeding or breastfeeding? | 4 |
| Did the provider give or prescribe any medicines for the child to take home? | 3 |
| Did the provider refer the child? | 3 |
| Did the provider tell you what illness your child has? | 3 |
| If QR code scanning is not possible, please manually enter the participant identification code | 3 |
| Can you explain to me why this device was used? | 2 |
| Did the provider use a tablet like this one for the consultation of the child? | 2 |
| Did the provider use the device that is represented in the following picture during the consultation of the child? | 2 |
| How did you feel with the fact that the provider used of a tablet for the consultation of the child? | 2 |